shotlocations <- all_lineups %>% select(1,2,3,45,69,71,73,75,77,79,81)
# Averages for Three frequency by year
averages <- shotlocations %>% filter(Team == "League Averages")
ggplot(data = averages, aes(x = Year, y = `OFFENSE: All Three Frequency`, color = `Regular Season or Playoffs`)) +
geom_point()
# Three frequency by year
minus_averages <- shotlocations %>% filter(Team != "League Averages")
ggplot(data = minus_averages, aes(x = Year, y = `OFFENSE: All Three Frequency`, group = Year)) +
geom_boxplot()
# Possessions vs. Three Frequency, colored by year. Definitely only a start.
ggplot(data = minus_averages, aes(x = `OFFENSE: All Three Frequency`, y = Poss, color = Year)) +
geom_point()
shotpercentages <- all_lineups %>% select(1,2,3,45,47,51,83,85,87,89,91,93,95)
minus_averages <- shotpercentages %>% filter(Team != "League Averages")
# EFG% By year
ggplot(data = minus_averages, aes(x = Year, y = `OFFENSE: eFG%`, group = Year)) +
geom_boxplot()
# 3% By year
ggplot(data = minus_averages, aes(x = Year, y = `OFFENSE: All Three FG%`, group = Year)) +
geom_boxplot()
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
# EFG% vs. Diff by year (need better visualization)
ggplot(data = minus_averages, aes(x = `OFFENSE: All Three FG%`, y = Diff, color = Year)) +
geom_point()
## Warning: Removed 1 rows containing missing values (geom_point).
averages <- all_lineups %>% filter(Team == "League Averages")
ggplot(data = averages, aes(x = Year, y = `OFFENSE: All Three Frequency`, color = `Regular Season or Playoffs`)) +
geom_point()
ggplot(data = averages, aes(x = Year, y = `OFFENSE: eFG%`, color = `Regular Season or Playoffs`)) +
geom_point()
ggplot(data = averages, aes(x = Year, y = `HALFCOURT: Pts/Play`, color = `Regular Season or Playoffs`)) +
geom_point()
ggplot(data = averages, aes(x = Year, y = `OFFENSE: Pts/Poss`, color = `Regular Season or Playoffs`)) +
geom_point()
ggplot(data = averages, aes(x = Year, y = `OFFENSE: All Mid Frequency`, color = `Regular Season or Playoffs`)) +
geom_point()
# See all differences
colMeans(select_if(all_playoff_lineups, is.numeric), na.rm = TRUE) - colMeans(select_if(all_reg_lineups, is.numeric), na.rm = TRUE)
## PG.Height
## 1.779330e-01
## PG.Age
## 9.651468e-01
## PG.Experience
## 1.142284e+00
## PG.GP
## 1.694219e+00
## PG.Draft.Pick
## -1.216340e+00
## SG.Height
## 5.279667e-02
## SG.Age
## 1.229555e+00
## SG.Experience
## 1.221778e+00
## SG.GP
## 7.433544e-01
## SG.Draft.Pick
## -6.178089e-01
## SF.Height
## 1.544587e-01
## SF.Age
## 1.019189e+00
## SF.Experience
## 1.216526e+00
## SF.GP
## 6.230867e-01
## SF.Draft.Pick
## -2.561142e-01
## PF.Height
## -1.081172e-01
## PF.Age
## 1.303010e+00
## PF.Experience
## 1.421132e+00
## PF.GP
## 7.630199e-01
## PF.Draft.Pick
## -2.113551e+00
## C.Height
## -4.587738e-02
## C.Age
## 1.017601e+00
## C.Experience
## 1.023301e+00
## C.GP
## 6.527488e-02
## Average.Height
## 4.569500e-02
## C.Draft.Pick
## -8.513709e-01
## Poss
## -4.043447e+01
## Diff.Rank
## 7.956715e-04
## Diff
## 1.470369e+00
## OFFENSE..Pts.Poss.Rank
## 6.736806e-03
## OFFENSE..Pts.Poss
## 1.060787e-01
## OFFENSE..eFG..Rank
## 3.858127e-02
## OFFENSE..eFG.
## -2.181954e-02
## OFFENSE..TOV..Rank
## 4.316671e-02
## OFFENSE..TOV.
## -4.696958e-01
## OFFENSE..ORB..Rank
## 5.016287e-02
## OFFENSE..ORB.
## -7.637928e-01
## OFFENSE..FT.Rate.Rank
## -1.612537e-02
## OFFENSE..FT.Rate
## 5.235421e-01
## DEFENSE..Pts.Poss.Rank
## 8.009880e-03
## DEFENSE..Pts.Poss
## -1.213584e+00
## DEFENSE..eFG..Rank
## 1.395101e-02
## DEFENSE..eFG.
## -7.183303e-01
## DEFENSE..TOV..Rank
## -1.482859e-02
## DEFENSE..TOV.
## -2.614435e-01
## DEFENSE..ORB..Rank
## -1.132153e-02
## DEFENSE..ORB.
## -2.026563e-01
## DEFENSE..FT.Rate.Rank
## -2.133406e-03
## DEFENSE..FT.Rate
## 2.741801e-01
## OFFENSE..Rim.Frequency.Rank
## 1.528875e-02
## OFFENSE..Rim.Frequency
## -1.553073e+00
## OFFENSE..Short.Mid.Frequency.Rank
## 3.532782e-02
## OFFENSE..Short.Mid.Frequency
## 7.309161e-01
## OFFENSE..Long.Mid.Frequency.Rank
## 2.593889e-02
## OFFENSE..Long.Mid.Frequency
## -1.283341e+00
## OFFENSE..All.Mid.Frequency.Rank
## 6.766262e-03
## OFFENSE..All.Mid.Frequency
## -5.512590e-01
## OFFENSE..Corner.Three.Frequency.Rank
## 2.678773e-02
## OFFENSE..Corner.Three.Frequency
## 6.547404e-01
## OFFENSE..Non.Corner.Three.Frequency.Rank
## 8.901463e-04
## OFFENSE..Non.Corner.Three.Frequency
## 1.451419e+00
## OFFENSE..All.Three.Frequency.Rank
## -4.997823e-03
## OFFENSE..All.Three.Frequency
## 2.105154e+00
## OFFENSE..Rim.FG..Rank
## 1.546656e-02
## OFFENSE..Rim.FG.
## -1.104168e-01
## OFFENSE..Short.Mid.FG.Rank
## 1.172367e-01
## OFFENSE..Short.Mid.FG.
## -2.179688e-02
## OFFENSE..Long.Mid.FG.Rank
## -5.363150e-02
## OFFENSE..Long.Mid.FG.
## -1.824210e-01
## OFFENSE..All.Mid.FG.Rank
## 1.369812e-02
## OFFENSE..All.Mid.FG.
## -1.200675e-01
## OFFENSE..Corner.Three.FG.Rank
## 8.086681e-02
## OFFENSE..Corner.Three.FG.
## 4.144015e-01
## OFFENSE..Non.Corner.Three.FG.Rank
## 2.419704e-02
## OFFENSE..Non.Corner.Three.FG.
## -8.910678e-02
## OFFENSE..All.Three.FG.Rank.
## -9.275052e-03
## OFFENSE..All.Three.FG.
## 1.998455e-01
## HALFCOURT..Pts.Play.Rank
## 2.800764e-02
## HALFCOURT..Pts.Play
## 7.931677e-01
## HALFCOURT..OREB..Rank
## -6.728328e-02
## HALFCOURT..OREB.
## -7.429195e-01
## HALFCOURT....of.Plays.Rank
## -8.897871e-04
## HALFCOURT....of.Plays
## 9.712880e-01
## PUTBACKS..Pts.Miss.Rank
## -1.508184e-02
## PUTBACKS..Pts.Miss
## -9.683087e-01
## PUTBACKS..Plays.Miss.Rank
## -8.432825e-02
## PUTBACKS..Plays.Miss
## -8.984441e-01
## PUTBACKS..Pts.Play.Rank
## 1.229566e-01
## PUTBACKS..Pts.Play
## 1.171665e+00
## DEFENSE..Rim.Frequency.Rank
## -1.478153e-02
## DEFENSE..Rim.Frequency
## -1.468974e+00
## DEFENSE..Short.Mid.Frequency.Rank
## 1.508256e-02
## DEFENSE..Short.Mid.Frequency
## 1.002798e+00
## DEFENSE..Long.Mid.Frequency.Rank
## 7.015560e-04
## DEFENSE..Long.Mid.Frequency
## -1.479147e+00
## DEFENSE..All.Mid.Frequency.Rank
## 2.470713e-02
## DEFENSE..All.Mid.Frequency
## -4.775742e-01
## DEFENSE..Corner.Three.Frequency.Rank
## -5.339441e-03
## DEFENSE..Corner.Three.Frequency
## 5.954205e-01
## DEFENSE..Non.Corner.Three.Frequency.Rank
## 2.379363e-02
## DEFENSE..Non.Corner.Three.Frequency
## 1.347696e+00
## DEFENSE..All.Three.Frequency.Rank
## 4.709370e-03
## DEFENSE..All.Three.Frequency
## 1.943700e+00
## DEFENSE..Rim.FG..Rank
## -1.260574e-02
## DEFENSE..Rim.FG.
## -8.784150e-01
## DEFENSE..Short.Mid.FG.Rank
## 7.125876e-02
## DEFENSE..Short.Mid.FG.
## -5.350151e-01
## DEFENSE..Long.Mid.FG.Rank
## -1.579582e-01
## DEFENSE..Long.Mid.FG.
## -7.599216e-02
## DEFENSE..All.Mid.FG.Rank
## -3.817930e-02
## DEFENSE..All.Mid.FG.
## -3.735670e-01
## DEFENSE..Corner.Three.FG.Rank
## 1.405120e-01
## DEFENSE..Corner.Three.FG.
## -4.941164e-01
## DEFENSE..Non.Corner.Three.FG.Rank
## -6.084534e-02
## DEFENSE..Non.Corner.Three.FG.
## -1.065350e+00
## DEFENSE..All.Three.FG.Rank.
## 8.783747e-02
## DEFENSE..All.Three.FG.
## -7.757598e-01
## DEFENSE.HALFCOURT..Pts.Play.Rank
## -4.838689e-03
## DEFENSE.HALFCOURT..Pts.Play
## -3.739931e-01
## DEFENSE.HALFCOURT..OREB..Rank
## -1.748035e-02
## DEFENSE.HALFCOURT..OREB.
## -3.043369e-01
## DEFENSE.HALFCOURT....of.Plays.Rank
## -4.539998e-02
## DEFENSE.HALFCOURT....of.Plays
## 1.031633e+00
## DEFENSE.PUTBACKS..Pts.Miss.Rank
## -3.840956e-02
## DEFENSE.PUTBACKS..Pts.Miss
## -8.977834e-01
## DEFENSE.PUTBACKS..Plays.Miss.Rank
## -2.352745e-02
## DEFENSE.PUTBACKS..Plays.Miss
## -5.533601e-01
## DEFENSE.PUTBACKS..Pts.Play.Rank
## -1.916918e-01
## DEFENSE.PUTBACKS..Pts.Play
## -1.859713e+00
firstrounders <- filter(all_players, `Draft Pick` < 31)
# Average Height of all players
ggplot(firstrounders) +
geom_boxplot(aes(x = `Draft Year`, y = `Height`))
foreign_players <- subset(all_players, Nationality != "United States")
foreign_players_with_dups <- subset(all_players_with_dups, Nationality != "United States")
ggplot(all_players, aes(x = Year, fill = Nationality), order = -as.numeric(y)) +
geom_bar(stat = "count", position = "fill", show.legend = FALSE)
all_lineups <- mutate(all_lineups, AvgExp = ((`PG Experience` + `SG Experience` + `SF Experience` + `PF Experience` + `C Experience`)/5))
plot(all_lineups$AvgExp, all_lineups$Diff)
mod1 = lm(all_lineups$Diff~all_lineups$AvgExp)
abline(mod1)
summary(mod1)
##
## Call:
## lm(formula = all_lineups$Diff ~ all_lineups$AvgExp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -67.586 -8.709 0.149 8.824 52.012
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.5972 0.4647 -7.742 1.13e-14 ***
## all_lineups$AvgExp 1.1045 0.0790 13.982 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.92 on 6719 degrees of freedom
## (44 observations deleted due to missingness)
## Multiple R-squared: 0.02827, Adjusted R-squared: 0.02813
## F-statistic: 195.5 on 1 and 6719 DF, p-value: < 2.2e-16
plot(mod1)
mod2 = lm(all_lineups$Diff~all_lineups$AvgExp + I(all_lineups$AvgExp^2))
summary(mod2)
##
## Call:
## lm(formula = all_lineups$Diff ~ all_lineups$AvgExp + I(all_lineups$AvgExp^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.058 -8.687 0.177 8.773 51.249
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.56321 0.96252 -8.897 < 2e-16 ***
## all_lineups$AvgExp 3.03104 0.33661 9.005 < 2e-16 ***
## I(all_lineups$AvgExp^2) -0.16134 0.02741 -5.887 4.13e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.88 on 6718 degrees of freedom
## (44 observations deleted due to missingness)
## Multiple R-squared: 0.03326, Adjusted R-squared: 0.03297
## F-statistic: 115.6 on 2 and 6718 DF, p-value: < 2.2e-16
B0 = summary(mod2)$coefficients[1,1]
B1 = summary(mod2)$coefficients[2,1]
B2 = summary(mod2)$coefficients[3,1]
plot(all_lineups$AvgExp, all_lineups$Diff)
curve(B0 + B1*x + B2*(x)^2, add=TRUE)
ggplot(data = all_lineups) +
geom_point(aes(x = Poss, y = Diff))
## Warning: Removed 38 rows containing missing values (geom_point).
q7lineups <- all_lineups %>%
filter(`Regular Season or Playoffs` == "Regular Season") %>%
mutate("Average Age" = (`PG Age` +`SG Age` +`SF Age` +`PF Age` +`C Age`)/5)
ggplot(data = q7lineups) +
geom_histogram(aes(x = `Average Age`, y = Diff), stat = "identity")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
## Warning: Removed 25 rows containing missing values (position_stack).
# Average lineup height over the years
ggplot(data = all_lineups, aes(x = Year, y = `Average Height`)) +
geom_boxplot() +
ggtitle("Avg lineup height")
## Warning: Removed 38 rows containing non-finite values (stat_boxplot).
# Average position height over the years
height_dist<- all_lineups %>%
pivot_longer(cols=c('PG Height', 'SG Height', 'SF Height','PF Height', 'C Height'), names_to = "Positions", values_to = "Height")
ggplot(data = height_dist, aes(x = Year, y = Height, color = Positions)) +
geom_boxplot() +
ggtitle("Avg position height")
## Warning: Removed 190 rows containing non-finite values (stat_boxplot).
# Too specific, so let's look position by position
ggplot(data = all_lineups, aes(x = Year, y = `PG Height`)) +
geom_boxplot() +
ggtitle("Avg PG height")
## Warning: Removed 38 rows containing non-finite values (stat_boxplot).
ggplot(data = all_lineups, aes(x = Year, y = `SG Height`)) +
geom_boxplot() +
ggtitle("Avg SG height")
## Warning: Removed 38 rows containing non-finite values (stat_boxplot).
ggplot(data = all_lineups, aes(x = Year, y = `SF Height`)) +
geom_boxplot() +
ggtitle("Avg SF height")
## Warning: Removed 38 rows containing non-finite values (stat_boxplot).
ggplot(data = all_lineups, aes(x = Year, y = `PF Height`)) +
geom_boxplot() +
ggtitle("Avg PF height")
## Warning: Removed 38 rows containing non-finite values (stat_boxplot).
ggplot(data = all_lineups, aes(x = Year, y = `C Height`)) +
geom_boxplot() +
ggtitle("Avg C height")
## Warning: Removed 38 rows containing non-finite values (stat_boxplot).
# Code undrafted as 121, may change later
for (i in 1:6765){
if (all_lineups$Team[i] != "League Averages"){
if (is.na(all_lineups$`PG Draft Pick`[i])){
all_lineups$`PG Draft Pick`[i] = 121
}
if (is.na(all_lineups$`SG Draft Pick`[i])){
all_lineups$`SG Draft Pick`[i] = 121
}
if (is.na(all_lineups$`SF Draft Pick`[i])){
all_lineups$`SF Draft Pick`[i] = 121
}
if (is.na(all_lineups$`PF Draft Pick`[i])){
all_lineups$`PF Draft Pick`[i] = 121
}
if (is.na(all_lineups$`C Draft Pick`[i])){
all_lineups$`C Draft Pick`[i] = 121
}
}
}
all_lineups <- mutate(all_lineups, "Average Draft Position" = (all_lineups$`PG Draft Pick` + all_lineups$`SG Draft Pick` + all_lineups$`SF Draft Pick` + all_lineups$`PF Draft Pick` + all_lineups$`C Draft Pick`)/5)
# Average Draft Position By Year
ggplot(data = all_lineups) +
geom_boxplot(aes(x = Year, y = `Average Draft Position`))
## Warning: Removed 38 rows containing non-finite values (stat_boxplot).
# Possessions vs. Average Draft Position
ggplot(data = all_lineups) +
geom_density_2d(aes(x = Poss, y = `Average Draft Position`))
## Warning: Removed 38 rows containing non-finite values (stat_density2d).
Our group decided to investigate Q1 and Q2 in further detail.
SHOW AT LEAST 2 TABLES OR FIGURES BELOW THAT EXPLORE ANSWERS FOR THE QUESTIONS YOU ARE INVESTIGATING FURTHER.
all_reg_lineups_minus_averages = filter(all_reg_lineups, Team != "League Averages")
modeldata <- subset(all_reg_lineups_minus_averages, select = -c(1, Year, Team, PG, SG, SF, PF, C))
Full <- lm(Diff~., modeldata)
anova(Full)
## Analysis of Variance Table
##
## Response: Diff
## Df Sum Sq Mean Sq F value
## PG.Height 1 7 7 3.1128e+03
## PG.Age 1 12338 12338 5.2387e+06
## PG.Experience 1 743 743 3.1540e+05
## PG.GP 1 4753 4753 2.0180e+06
## PG.Draft.Year 32 14680 459 1.9478e+05
## PG.Draft.Pick 1 150 150 6.3651e+04
## PG.Nationality 25 12320 493 2.0923e+05
## SG.Height 1 743 743 3.1537e+05
## SG.Age 1 5373 5373 2.2812e+06
## SG.Experience 1 191 191 8.1006e+04
## SG.GP 1 768 768 3.2615e+05
## SG.Draft.Year 32 12695 397 1.6844e+05
## SG.Draft.Pick 1 320 320 1.3600e+05
## SG.Nationality 35 10324 295 1.2524e+05
## SF.Height 1 44 44 1.8629e+04
## SF.Age 1 606 606 2.5733e+05
## SF.Experience 1 171 171 7.2750e+04
## SF.GP 1 451 451 1.9161e+05
## SF.Draft.Year 32 9383 293 1.2449e+05
## SF.Draft.Pick 1 58 58 2.4787e+04
## SF.Nationality 45 13666 304 1.2894e+05
## PF.Height 1 45 45 1.9020e+04
## PF.Age 1 427 427 1.8113e+05
## PF.Experience 1 648 648 2.7519e+05
## PF.GP 1 305 305 1.2955e+05
## PF.Draft.Year 34 8841 260 1.1041e+05
## PF.Draft.Pick 1 6 6 2.7357e+03
## PF.Nationality 54 15032 278 1.1820e+05
## C.Height 1 21 21 9.0921e+03
## C.Age 1 319 319 1.3526e+05
## C.Experience 1 117 117 4.9602e+04
## C.GP 1 902 902 3.8302e+05
## C.Draft.Year 34 9514 280 1.1881e+05
## C.Draft.Pick 1 1073 1073 4.5579e+05
## C.Nationality 57 19000 333 1.4153e+05
## Poss 1 2392 2392 1.0155e+06
## Diff.Rank 1 597228 597228 2.5358e+08
## OFFENSE..Pts.Poss.Rank 1 508 508 2.1563e+05
## OFFENSE..Pts.Poss 1 7449 7449 3.1627e+06
## OFFENSE..eFG..Rank 1 0 0 4.1000e-03
## OFFENSE..eFG. 1 40 40 1.6801e+04
## OFFENSE..TOV..Rank 1 25 25 1.0437e+04
## OFFENSE..TOV. 1 81 81 3.4533e+04
## OFFENSE..ORB..Rank 1 2 2 9.2605e+02
## OFFENSE..ORB. 1 21 21 8.7752e+03
## OFFENSE..FT.Rate.Rank 1 61 61 2.5853e+04
## OFFENSE..FT.Rate 1 64 64 2.7080e+04
## DEFENSE..Pts.Poss.Rank 1 13782 13782 5.8516e+06
## DEFENSE..Pts.Poss 1 26404 26404 1.1211e+07
## DEFENSE..eFG..Rank 1 0 0 4.3185e+00
## DEFENSE..eFG. 1 0 0 1.9593e+00
## DEFENSE..TOV..Rank 1 0 0 4.9500e-02
## DEFENSE..TOV. 1 0 0 9.0300e-01
## DEFENSE..ORB..Rank 1 0 0 1.7328e+00
## DEFENSE..ORB. 1 0 0 5.9400e-02
## DEFENSE..FT.Rate.Rank 1 0 0 1.7530e-01
## DEFENSE..FT.Rate 1 0 0 3.2280e-01
## OFFENSE..Rim.Frequency.Rank 1 0 0 6.5180e-01
## OFFENSE..Rim.Frequency 1 0 0 6.2580e-01
## OFFENSE..Short.Mid.Frequency.Rank 1 0 0 6.5800e-02
## OFFENSE..Short.Mid.Frequency 1 0 0 3.6801e+00
## OFFENSE..Long.Mid.Frequency.Rank 1 0 0 2.2985e+00
## OFFENSE..Long.Mid.Frequency 1 0 0 7.8300e-02
## OFFENSE..All.Mid.Frequency.Rank 1 0 0 1.8460e-01
## OFFENSE..All.Mid.Frequency 1 0 0 3.5780e-01
## OFFENSE..Corner.Three.Frequency.Rank 1 0 0 5.9000e-03
## OFFENSE..Corner.Three.Frequency 1 0 0 6.0060e-01
## OFFENSE..Non.Corner.Three.Frequency.Rank 1 0 0 6.6684e+00
## OFFENSE..Non.Corner.Three.Frequency 1 0 0 3.1280e-01
## OFFENSE..All.Three.Frequency.Rank 1 0 0 3.6720e+00
## OFFENSE..All.Three.Frequency 1 0 0 1.9400e-01
## OFFENSE..Rim.FG..Rank 1 0 0 9.1749e+00
## OFFENSE..Rim.FG. 1 0 0 1.7890e+00
## OFFENSE..Short.Mid.FG.Rank 1 0 0 1.5172e+00
## OFFENSE..Short.Mid.FG. 1 0 0 3.0996e+00
## OFFENSE..Long.Mid.FG.Rank 1 0 0 2.1200e-02
## OFFENSE..Long.Mid.FG. 1 0 0 1.4620e+00
## OFFENSE..All.Mid.FG.Rank 1 0 0 2.2492e+00
## OFFENSE..All.Mid.FG. 1 0 0 9.0375e+00
## OFFENSE..Corner.Three.FG.Rank 1 0 0 2.4240e-01
## OFFENSE..Corner.Three.FG. 1 0 0 1.9070e-01
## OFFENSE..Non.Corner.Three.FG.Rank 1 0 0 1.6400e-02
## OFFENSE..Non.Corner.Three.FG. 1 0 0 9.4000e-02
## OFFENSE..All.Three.FG.Rank. 1 0 0 1.5057e+00
## OFFENSE..All.Three.FG. 1 0 0 1.6811e+00
## HALFCOURT..Pts.Play.Rank 1 0 0 1.6284e+00
## HALFCOURT..Pts.Play 1 0 0 1.3900e-02
## HALFCOURT..OREB..Rank 1 0 0 2.8300e-02
## HALFCOURT..OREB. 1 0 0 2.1000e-03
## HALFCOURT....of.Plays.Rank 1 0 0 2.3985e+00
## HALFCOURT....of.Plays 1 0 0 1.0000e-04
## PUTBACKS..Pts.Miss.Rank 1 0 0 4.8700e-02
## PUTBACKS..Pts.Miss 1 0 0 9.5000e-02
## PUTBACKS..Plays.Miss.Rank 1 0 0 2.1280e-01
## PUTBACKS..Plays.Miss 1 0 0 9.2780e-01
## PUTBACKS..Pts.Play.Rank 1 0 0 4.0400e-02
## PUTBACKS..Pts.Play 1 0 0 3.2670e+00
## DEFENSE..Rim.Frequency.Rank 1 0 0 6.6000e-03
## DEFENSE..Rim.Frequency 1 0 0 1.1240e-01
## DEFENSE..Short.Mid.Frequency.Rank 1 0 0 2.6380e-01
## DEFENSE..Short.Mid.Frequency 1 0 0 2.7700e-02
## DEFENSE..Long.Mid.Frequency.Rank 1 0 0 2.6470e+00
## DEFENSE..Long.Mid.Frequency 1 0 0 8.9200e-02
## DEFENSE..All.Mid.Frequency.Rank 1 0 0 5.6500e-02
## DEFENSE..All.Mid.Frequency 1 0 0 2.1527e+00
## DEFENSE..Corner.Three.Frequency.Rank 1 0 0 3.0000e-04
## DEFENSE..Corner.Three.Frequency 1 0 0 6.9600e-02
## DEFENSE..Non.Corner.Three.Frequency.Rank 1 0 0 2.6070e-01
## DEFENSE..Non.Corner.Three.Frequency 1 0 0 2.3100e-02
## DEFENSE..All.Three.Frequency.Rank 1 0 0 5.9700e-02
## DEFENSE..All.Three.Frequency 1 0 0 3.1840e-01
## DEFENSE..Rim.FG..Rank 1 0 0 2.7584e+00
## DEFENSE..Rim.FG. 1 0 0 7.0830e-01
## DEFENSE..Short.Mid.FG.Rank 1 0 0 2.5870e-01
## DEFENSE..Short.Mid.FG. 1 0 0 2.0360e-01
## DEFENSE..Long.Mid.FG.Rank 1 0 0 2.2255e+00
## DEFENSE..Long.Mid.FG. 1 0 0 1.1310e-01
## DEFENSE..All.Mid.FG.Rank 1 0 0 2.5500e-02
## DEFENSE..All.Mid.FG. 1 0 0 6.0500e-01
## DEFENSE..Corner.Three.FG.Rank 1 0 0 1.2921e+00
## DEFENSE..Corner.Three.FG. 1 0 0 3.3150e-01
## DEFENSE..Non.Corner.Three.FG.Rank 1 0 0 2.1840e-01
## DEFENSE..Non.Corner.Three.FG. 1 0 0 1.8877e+00
## DEFENSE..All.Three.FG.Rank. 1 0 0 7.7500e-02
## DEFENSE..All.Three.FG. 1 0 0 1.8000e-03
## DEFENSE.HALFCOURT..Pts.Play.Rank 1 0 0 4.4470e-01
## DEFENSE.HALFCOURT..Pts.Play 1 0 0 2.7745e+00
## DEFENSE.HALFCOURT..OREB..Rank 1 0 0 5.4000e-03
## DEFENSE.HALFCOURT..OREB. 1 0 0 4.4700e-01
## DEFENSE.HALFCOURT....of.Plays.Rank 1 0 0 2.6135e+00
## DEFENSE.HALFCOURT....of.Plays 1 0 0 3.1400e-02
## DEFENSE.PUTBACKS..Pts.Miss.Rank 1 0 0 3.8250e-01
## DEFENSE.PUTBACKS..Pts.Miss 1 0 0 3.9300e-02
## DEFENSE.PUTBACKS..Plays.Miss.Rank 1 0 0 2.0730e-01
## DEFENSE.PUTBACKS..Plays.Miss 1 0 0 7.9600e-02
## DEFENSE.PUTBACKS..Pts.Play.Rank 1 0 0 3.1390e-01
## DEFENSE.PUTBACKS..Pts.Play 1 0 0 1.0509e+00
## Residuals 3639 9 0
## Pr(>F)
## PG.Height < 2.2e-16 ***
## PG.Age < 2.2e-16 ***
## PG.Experience < 2.2e-16 ***
## PG.GP < 2.2e-16 ***
## PG.Draft.Year < 2.2e-16 ***
## PG.Draft.Pick < 2.2e-16 ***
## PG.Nationality < 2.2e-16 ***
## SG.Height < 2.2e-16 ***
## SG.Age < 2.2e-16 ***
## SG.Experience < 2.2e-16 ***
## SG.GP < 2.2e-16 ***
## SG.Draft.Year < 2.2e-16 ***
## SG.Draft.Pick < 2.2e-16 ***
## SG.Nationality < 2.2e-16 ***
## SF.Height < 2.2e-16 ***
## SF.Age < 2.2e-16 ***
## SF.Experience < 2.2e-16 ***
## SF.GP < 2.2e-16 ***
## SF.Draft.Year < 2.2e-16 ***
## SF.Draft.Pick < 2.2e-16 ***
## SF.Nationality < 2.2e-16 ***
## PF.Height < 2.2e-16 ***
## PF.Age < 2.2e-16 ***
## PF.Experience < 2.2e-16 ***
## PF.GP < 2.2e-16 ***
## PF.Draft.Year < 2.2e-16 ***
## PF.Draft.Pick < 2.2e-16 ***
## PF.Nationality < 2.2e-16 ***
## C.Height < 2.2e-16 ***
## C.Age < 2.2e-16 ***
## C.Experience < 2.2e-16 ***
## C.GP < 2.2e-16 ***
## C.Draft.Year < 2.2e-16 ***
## C.Draft.Pick < 2.2e-16 ***
## C.Nationality < 2.2e-16 ***
## Poss < 2.2e-16 ***
## Diff.Rank < 2.2e-16 ***
## OFFENSE..Pts.Poss.Rank < 2.2e-16 ***
## OFFENSE..Pts.Poss < 2.2e-16 ***
## OFFENSE..eFG..Rank 0.949253
## OFFENSE..eFG. < 2.2e-16 ***
## OFFENSE..TOV..Rank < 2.2e-16 ***
## OFFENSE..TOV. < 2.2e-16 ***
## OFFENSE..ORB..Rank < 2.2e-16 ***
## OFFENSE..ORB. < 2.2e-16 ***
## OFFENSE..FT.Rate.Rank < 2.2e-16 ***
## OFFENSE..FT.Rate < 2.2e-16 ***
## DEFENSE..Pts.Poss.Rank < 2.2e-16 ***
## DEFENSE..Pts.Poss < 2.2e-16 ***
## DEFENSE..eFG..Rank 0.037769 *
## DEFENSE..eFG. 0.161674
## DEFENSE..TOV..Rank 0.823888
## DEFENSE..TOV. 0.342036
## DEFENSE..ORB..Rank 0.188138
## DEFENSE..ORB. 0.807504
## DEFENSE..FT.Rate.Rank 0.675437
## DEFENSE..FT.Rate 0.569989
## OFFENSE..Rim.Frequency.Rank 0.419522
## OFFENSE..Rim.Frequency 0.428935
## OFFENSE..Short.Mid.Frequency.Rank 0.797591
## OFFENSE..Short.Mid.Frequency 0.055145 .
## OFFENSE..Long.Mid.Frequency.Rank 0.129586
## OFFENSE..Long.Mid.Frequency 0.779674
## OFFENSE..All.Mid.Frequency.Rank 0.667516
## OFFENSE..All.Mid.Frequency 0.549765
## OFFENSE..Corner.Three.Frequency.Rank 0.939028
## OFFENSE..Corner.Three.Frequency 0.438407
## OFFENSE..Non.Corner.Three.Frequency.Rank 0.009852 **
## OFFENSE..Non.Corner.Three.Frequency 0.575996
## OFFENSE..All.Three.Frequency.Rank 0.055411 .
## OFFENSE..All.Three.Frequency 0.659595
## OFFENSE..Rim.FG..Rank 0.002471 **
## OFFENSE..Rim.FG. 0.181135
## OFFENSE..Short.Mid.FG.Rank 0.218121
## OFFENSE..Short.Mid.FG. 0.078397 .
## OFFENSE..Long.Mid.FG.Rank 0.884185
## OFFENSE..Long.Mid.FG. 0.226695
## OFFENSE..All.Mid.FG.Rank 0.133770
## OFFENSE..All.Mid.FG. 0.002663 **
## OFFENSE..Corner.Three.FG.Rank 0.622501
## OFFENSE..Corner.Three.FG. 0.662373
## OFFENSE..Non.Corner.Three.FG.Rank 0.897956
## OFFENSE..Non.Corner.Three.FG. 0.759123
## OFFENSE..All.Three.FG.Rank. 0.219875
## OFFENSE..All.Three.FG. 0.194865
## HALFCOURT..Pts.Play.Rank 0.202009
## HALFCOURT..Pts.Play 0.906204
## HALFCOURT..OREB..Rank 0.866495
## HALFCOURT..OREB. 0.963471
## HALFCOURT....of.Plays.Rank 0.121542
## HALFCOURT....of.Plays 0.990295
## PUTBACKS..Pts.Miss.Rank 0.825334
## PUTBACKS..Pts.Miss 0.757982
## PUTBACKS..Plays.Miss.Rank 0.644642
## PUTBACKS..Plays.Miss 0.335487
## PUTBACKS..Pts.Play.Rank 0.840745
## PUTBACKS..Pts.Play 0.070769 .
## DEFENSE..Rim.Frequency.Rank 0.935340
## DEFENSE..Rim.Frequency 0.737496
## DEFENSE..Short.Mid.Frequency.Rank 0.607573
## DEFENSE..Short.Mid.Frequency 0.867884
## DEFENSE..Long.Mid.Frequency.Rank 0.103831
## DEFENSE..Long.Mid.Frequency 0.765168
## DEFENSE..All.Mid.Frequency.Rank 0.812208
## DEFENSE..All.Mid.Frequency 0.142408
## DEFENSE..Corner.Three.Frequency.Rank 0.986830
## DEFENSE..Corner.Three.Frequency 0.791957
## DEFENSE..Non.Corner.Three.Frequency.Rank 0.609683
## DEFENSE..Non.Corner.Three.Frequency 0.879244
## DEFENSE..All.Three.Frequency.Rank 0.806918
## DEFENSE..All.Three.Frequency 0.572602
## DEFENSE..Rim.FG..Rank 0.096834 .
## DEFENSE..Rim.FG. 0.400068
## DEFENSE..Short.Mid.FG.Rank 0.611032
## DEFENSE..Short.Mid.FG. 0.651831
## DEFENSE..Long.Mid.FG.Rank 0.135833
## DEFENSE..Long.Mid.FG. 0.736696
## DEFENSE..All.Mid.FG.Rank 0.873069
## DEFENSE..All.Mid.FG. 0.436729
## DEFENSE..Corner.Three.FG.Rank 0.255737
## DEFENSE..Corner.Three.FG. 0.564786
## DEFENSE..Non.Corner.Three.FG.Rank 0.640302
## DEFENSE..Non.Corner.Three.FG. 0.169542
## DEFENSE..All.Three.FG.Rank. 0.780702
## DEFENSE..All.Three.FG. 0.966376
## DEFENSE.HALFCOURT..Pts.Play.Rank 0.504902
## DEFENSE.HALFCOURT..Pts.Play 0.095862 .
## DEFENSE.HALFCOURT..OREB..Rank 0.941212
## DEFENSE.HALFCOURT..OREB. 0.503803
## DEFENSE.HALFCOURT....of.Plays.Rank 0.106043
## DEFENSE.HALFCOURT....of.Plays 0.859339
## DEFENSE.PUTBACKS..Pts.Miss.Rank 0.536288
## DEFENSE.PUTBACKS..Pts.Miss 0.842861
## DEFENSE.PUTBACKS..Plays.Miss.Rank 0.648947
## DEFENSE.PUTBACKS..Plays.Miss 0.777899
## DEFENSE.PUTBACKS..Pts.Play.Rank 0.575320
## DEFENSE.PUTBACKS..Pts.Play 0.305377
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(Full)
## Warning: not plotting observations with leverage one:
## 24, 259, 304, 341, 368, 391, 746, 983, 1432, 1501, 2041, 2055, 2694, 2814, 2959, 3334, 3404, 3574, 3734, 3757, 3774, 4000, 4142
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
#This is a graph that uses years of service (YOS) as a proxy for the year of entry in the league
ggplot(all_players, aes(x = YOS, fill = Nationality), order = -as.numeric(y)) +
geom_bar(stat = "count", position = "fill", show.legend = FALSE)
GIVE A 2 PARAGRAPH SUMMARY.
PARAGRAPH 1 SHOULD DESCRIBE WHAT YOU LEARNED ABOUT YOUR DATA FROM INVESTIGATING THE INITIAL QUESTIONS. DID YOU FIND ANYTHING UNUSUAL IN YOUR DATA? DID ANYTHING SURPRISE YOU? WHICH OF THE INITIAL QUESTIONS WERE HELPFUL IN LEADING YOU TO MORE QUESTIONS?
For Max’s questions, we found that there has been a significant increase in 3-point frequency over the past twenty years, and while there has not been an increase in 3 point percentage, there has been an increase in eFG%, meaning teams are prioritizing more efficient shots nowadays. This investigation prompted us to look into what predictors (other than 3 point percentage/frequency) create lineups that have higher point differentials. For Manzi’s questions, we used 5 variables to see if there were any stark differences between lineups for playoffs and regular season. For the 5 we selected, three point frequency had the largest difference, but the others didn’t seem to have a major effect, which was surprising. It would be interesting to look at all variables and check the differences (like we did at the end of the code chunk). Secondly, it seems like there’s been a slight decline in the average height of first round picks, but nothing drastic, which was surprising. We expected an increase. For Liam’s questions, a plot that gives an idea of the breakdown of player nationalities by year was made for question 1. It can be refined to make it more clear what proportion of the players have the nationality of the United States. The follow up question would be to further break this down and try to control for draft year. That way we could get a proxy variable for what proportion of new players do not have the nationality of the United States giving a good trend over time. Given the results of the model for the second question this would be better for further study. It can still be seen, though with some difficulty at first, that over time US player have become less dominant in proportion. For the second question different forms of models were tried to to see if they fit better for predicting point differential of lineups based on average experience. Additionally, some transformations to add more variables to see if the R-squared improves from 0.02813. Other plots used to check the data for potential use of a linear fit do not show some of the warning signs but the sheer volume of the points deviating from the linear fit depress the R-squared value. As one can see from the second model changing the model shape did not improve the fit visually. The summary of the fit is similarly poor without much improvement, this indicates there is not much promise here for further exploration of this data set without further changes. For Jake’s questions, it appears that lineups that have played between 100 and 300 possessions have the greatest point differentials. Given that some lineups have played 1000s of possessions with each other, it appears that there is some other correlation with possessions played and point differential other than a strong positive one. Secondly, it appears that lineups with an average age of under 25 tend to have negative point differentials, while lineups with an average age of above 25 tend to have positive point differentials. For Belin’s questions, we found that there has been a slight decline in the average lineup height over the past twenty years, but there haven’t been any significant changes in the average height per position. We also found that a large majority of teams’ average draft position for their most popular lineups is around 10-12, but there is also a cluster around the 40s range. We thought this was very interesting.
PARAGRAPH 2 SHOULD SUMMARIZE WHAT YOU LEARNED FROM INVESTIGATING THE FOLLOW-UP QUESTIONS. WHY ARE THESE FOLLOW-UP QUESTIONS INTERESTING FOR INVESTIGATION? DESCRIBE THE TABLES/FIGURES YOU USED TO EXPLORE ANSWERS TO THESE FOLLOW-UP QUESTIONS? WHAT DID YOU LEARN FROM THE TABLES/FIGURES REGARDING THE FOLLOW-UP QUESTIONS YOU PROPOSED?
Our first follow up question is interesting because we can try and determine the success of regular season lineups through the prediction of a model. By running an ANOVA on a full model (minus predictors that wouldn’t make sense like Year or player names), we can see that many of our variables have low p-values, indicating they may be useful for further investigation into prediction. Many of these predictors were offensive percentages from the field. If we wanted to continue building this model, we could use forward selection with these predictors. By plotting the Full model, we can obviously tell that it is nowhere near ready. The second question will continue to require more dataframe manipulation in order to form graphs to fully explore what we would like. However, it can be seen in the follow up graph that players that have not been in the league as long, more frequently identify with a nationality other than the United States. This finding could be important for teams and analysts trying to find new players and helping to set up established systems for getting international talent to the NBA. The graph clearly shows just how much less dominant the blue/purple section (USA) has become the less experience the players have. This makes it clear than a trend exists and further examination of international players within specific sets of variables could lead to interesting, yet difficult to predict findings.